home *** CD-ROM | disk | FTP | other *** search
- /*
- * boolean_op.c --
- * SCCS Status : %W% %G%
- * Author : Huynh Quoc T. Tung
- * Created On : Sun Oct 17 16:40:46 1993
- * Last Modified By: Huynh Quoc T. Tung
- * Last Modified On: Mon May 30 14:28:41 1994
- * Update Count : 27
- * Status : Unknown, Use with caution!
- */
-
- /********************* PROCEDURE DESCRIPTION ************************
- * Data structure used is the pushdown stack.
- * Basic operations:
- * push(v)
- * long v;
- *
- * push an item onto the stack (insert it at the beginning)
- *
- * pop() : pop an item (remove it from the beginning)
- *
- * void Union(result, set1, set2)
- * search_result_struct *result;
- * search_result_struct *set1;
- * search_result_struct *set2;
- *
- * union two sets and write it into result.
- *
- * void Intersection(result, set1, set2)
- * search_result_struct * result;
- * search_result_struct * set1;
- * search_result_struct * set2;
- *
- * intersection two sets and write it into result.
- *
- * void Difference(result, set1, set2)
- * search_result_struct *result;
- * search_result_struct *set1;
- * search_result_struct *set2;
- *
- * elements of set1 which are common to elements of set2 will be ignored.
- *
- */
-
- /*
- #include <stdio.h>
- #include <string.h>
- */
- #include "cdialect.h"
- #include "irfiles.h"
- #include "boolean_op.h"
- #ifdef NEW_WEIGHT /* tung , 5/94 */
- #include "irtfiles.h"
- #endif
-
- struct node
- {
- long key;
- struct node *next;
- };
- struct node *head, *z, *t;
-
- search_result_struct *end_result = NULL;
-
- boolean IsOperator(op)
- char *op;
- {
- if(!strcmp(op,"and") ||
- !strcmp(op,"or") ||
- !strcmp(op,"not"))
- return(true);
- else return(false);
- }
-
- void stackinit()
- {
- head = (struct node *) malloc(sizeof (* head));
- z = (struct node *) malloc(sizeof(* z));
- head->next = z;
- head->key = 0;
- z->next = z;
- }
-
- void push(v)
- long v;
- {
- t = (struct node *) malloc(sizeof(* t));
- t->key = v;
- t->next = head->next;
- head->next = t;
- }
-
- long pop()
- {
- long x;
-
- t = head->next;
- head->next = t->next;
- x = t->key;
- free(t);
- return x;
- }
-
- int stackempty()
- {
- return head->next == z;
- }
-
- void Union(result, set1, set2)
- search_result_struct *result;
- search_result_struct *set1;
- search_result_struct *set2;
- {
- while((set1->number_of_hits != 0) &&
- (set2->number_of_hits != 0)) {
- if(set1->doc_ids_array->doc_id < set2->doc_ids_array->doc_id) {
- result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
- result->doc_ids_array->score = set1->doc_ids_array->score;
- ++result->number_of_hits;
- ++result->doc_ids_array ;
- --set1->number_of_hits;
- ++set1->doc_ids_array;
- }
- else if(set1->doc_ids_array->doc_id > set2->doc_ids_array->doc_id) {
- result->doc_ids_array->doc_id = set2->doc_ids_array->doc_id;
- result->doc_ids_array->score = set2->doc_ids_array->score;
- ++result->number_of_hits;
- ++result->doc_ids_array ;
- --set2->number_of_hits;
- ++set2->doc_ids_array;
- }
- else { /* doc_id1 == doc_id2 */
- result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
- result->doc_ids_array->score = set1->doc_ids_array->score + set2->doc_ids_array->score;
- ++result->number_of_hits;
- ++result->doc_ids_array ;
- --set1->number_of_hits; --set2->number_of_hits;
- ++set1->doc_ids_array; ++set2->doc_ids_array;
- }
- }
- if((set1->number_of_hits == 0) &&
- (set2->number_of_hits != 0)) {
- memcpy((char *)result->doc_ids_array,
- (char *)set2->doc_ids_array,
- set2->number_of_hits * sizeof(doc_descr_struct));
- set2->doc_ids_array += set2->number_of_hits;
- result->doc_ids_array += set2->number_of_hits;
- result->number_of_hits += set2->number_of_hits;
- set2->number_of_hits = 0;
- }
- else if((set1->number_of_hits != 0) &&
- (set2->number_of_hits == 0)) {
- memcpy((char *)result->doc_ids_array,
- (char *)set1->doc_ids_array,
- set1->number_of_hits * sizeof(doc_descr_struct));
- set1->doc_ids_array += set1->number_of_hits;
- result->doc_ids_array += set1->number_of_hits;
- result->number_of_hits += set1->number_of_hits;
- set1->number_of_hits = 0;
- }
- }
-
- void Or_Operator(op1, op2)
- search_result_struct* op1;
- search_result_struct* op2;
- {
- long doc_ids_array_size = sizeof(doc_descr_struct);
- long op1_number_of_hits, op2_number_of_hits;
-
- op1_number_of_hits = op1->number_of_hits;
- op2_number_of_hits = op2->number_of_hits;
-
- if(op1->number_of_hits == 0) {
- if(op1->doc_ids_array != NULL)
- s_free(op1->doc_ids_array);
- if(op2->number_of_hits > 0) {
- memcpy((char *)end_result->doc_ids_array,
- (char *)op2->doc_ids_array,
- doc_ids_array_size * op2->number_of_hits);
- }
- end_result->number_of_hits = op2->number_of_hits;
- push(op2->operand_id);
- }
- else if(op2->number_of_hits == 0) {
- if(op2->doc_ids_array != NULL)
- s_free(op2->doc_ids_array);
- if(op1->number_of_hits > 0) {
- memcpy((char *)end_result->doc_ids_array,
- (char *)op1->doc_ids_array,
- doc_ids_array_size * op1->number_of_hits);
- }
- end_result->number_of_hits = op1->number_of_hits;
- push(op1->operand_id);
- }
- else if((op1->number_of_hits != 0) &&
- (op2->number_of_hits != 0)) {
- end_result->number_of_hits = 0;
-
- Union(end_result, op1, op2);
-
- op1->doc_ids_array -= op1_number_of_hits - op1->number_of_hits;
- op2->doc_ids_array -= op2_number_of_hits - op2->number_of_hits;
- end_result->doc_ids_array -= end_result->number_of_hits;
- op2->number_of_hits = end_result->number_of_hits;
- s_free(op1->doc_ids_array);
- if(end_result->number_of_hits > 0) {
- memcpy((char *)op2->doc_ids_array,
- (char *)end_result->doc_ids_array,
- end_result->number_of_hits * doc_ids_array_size);
- }
- else {
- op2->number_of_hits = end_result->number_of_hits;
- s_free(op2->doc_ids_array);
- }
- push(op2->operand_id);
- }
- }
-
- void Intersection(result, set1, set2)
- search_result_struct * result;
- search_result_struct * set1;
- search_result_struct * set2;
- {
- while((set1->number_of_hits != 0) &&
- (set2->number_of_hits != 0)) {
- if(set1->doc_ids_array->doc_id == set2->doc_ids_array->doc_id) {
- result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
- #ifdef NEW_WEIGHT /* tung , 5/94 */
- result->doc_ids_array->score = MINIMUM(set1->doc_ids_array->score, set2->doc_ids_array->score);
- #else
- result->doc_ids_array->score = set1->doc_ids_array->score + set2->doc_ids_array->score;
- #endif
- ++result->number_of_hits;
- ++result->doc_ids_array ;
- --set1->number_of_hits; --set2->number_of_hits;
- ++set1->doc_ids_array ; ++set2->doc_ids_array ;
- }
- else if(set1->doc_ids_array->doc_id < set2->doc_ids_array->doc_id) {
- --set1->number_of_hits;
- ++set1->doc_ids_array ;
- }
- else /* doc_id1 > doc_id2 */
- {
- --set2->number_of_hits;
- ++set2->doc_ids_array ;
- }
- }
- }
-
- void And_Operator(op1, op2)
- search_result_struct* op1;
- search_result_struct* op2;
- {
- long doc_ids_array_size = sizeof(doc_descr_struct);
- long op1_number_of_hits, op2_number_of_hits;
-
- op1_number_of_hits = op1->number_of_hits;
- op2_number_of_hits = op2->number_of_hits;
-
- if(op1->number_of_hits == 0) {
- end_result->number_of_hits = 0;
- if(op1->doc_ids_array != NULL)
- s_free(op1->doc_ids_array);
- if(op2->doc_ids_array != NULL)
- s_free(op2->doc_ids_array);
- push(op1->operand_id);
- }
- else if(op2->number_of_hits == 0) {
- end_result->number_of_hits = 0;
- if(op1->doc_ids_array != NULL)
- s_free(op1->doc_ids_array);
- if(op2->doc_ids_array != NULL)
- s_free(op2->doc_ids_array);
- push(op2->operand_id);
- }
- else if((op1->number_of_hits != 0) &&
- (op2->number_of_hits != 0)) {
- end_result->number_of_hits = 0;
-
- Intersection(end_result, op1, op2);
-
- op1->doc_ids_array -= op1_number_of_hits - op1->number_of_hits;
- op2->doc_ids_array -= op2_number_of_hits - op2->number_of_hits;
- end_result->doc_ids_array -= end_result->number_of_hits;
- op2->number_of_hits = end_result->number_of_hits;
- s_free(op1->doc_ids_array);
-
- if(end_result->number_of_hits > 0) {
- memcpy((char *)op2->doc_ids_array,
- (char *)end_result->doc_ids_array,
- end_result->number_of_hits * doc_ids_array_size);
- }
- else {
- op2->number_of_hits = end_result->number_of_hits;
- s_free(op2->doc_ids_array);
- }
- push(op2->operand_id);
- }
- }
-
- void Difference(result, set1, set2)
- search_result_struct *result;
- search_result_struct *set1;
- search_result_struct *set2;
- {
- while((set1->number_of_hits != 0) &&
- (set2->number_of_hits != 0)) {
- if(set1->doc_ids_array->doc_id == set2->doc_ids_array->doc_id) {
- #ifdef NEW_WEIGHT /* tung , 5/94 */
- result->doc_ids_array->score = MINIMUM(set1->doc_ids_array->score, 1.0 - set2->doc_ids_array->score);
- if(result->doc_ids_array->score < 0)
- result->doc_ids_array->score = 0;
- #endif
- --set1->number_of_hits; --set2->number_of_hits;
- ++set1->doc_ids_array; ++set2->doc_ids_array;
- }
- else if(set1->doc_ids_array->doc_id < set2->doc_ids_array->doc_id) {
- result->doc_ids_array->doc_id = set1->doc_ids_array->doc_id;
- result->doc_ids_array->score = set1->doc_ids_array->score;
- ++result->number_of_hits;
- ++result->doc_ids_array;
- --set1->number_of_hits;
- ++set1->doc_ids_array;
- }
- else /* doc_id1 > doc_id2 */ {
- --set2->number_of_hits;
- ++set2->doc_ids_array;
- }
- }
- if((set1->number_of_hits != 0) &&
- (set2->number_of_hits == 0)) {
- memcpy((char *)result->doc_ids_array,
- (char *)set1->doc_ids_array,
- set1->number_of_hits * sizeof(doc_descr_struct));
- set1->doc_ids_array += set1->number_of_hits;
- result->doc_ids_array += set1->number_of_hits;
- result->number_of_hits += set1->number_of_hits;
- set1->number_of_hits = 0;
- }
- }
-
- void Not_Operator( op1, op2)
- search_result_struct* op1;
- search_result_struct* op2;
- {
- long doc_ids_array_size = sizeof(doc_descr_struct);
- long op1_number_of_hits, op2_number_of_hits;
-
- op1_number_of_hits = op1->number_of_hits;
- op2_number_of_hits = op2->number_of_hits;
-
- if(op1->number_of_hits == 0) {
- end_result->number_of_hits = 0;
- if(op1->doc_ids_array != NULL)
- s_free(op1->doc_ids_array);
- if(op2->doc_ids_array != NULL)
- s_free(op2->doc_ids_array);
- push(op1->operand_id);
- }
- else if(op2->number_of_hits == 0) {
- if(op2->doc_ids_array != NULL)
- s_free(op2->doc_ids_array);
- if(op1->number_of_hits > 0) {
- memcpy((char *)end_result->doc_ids_array,
- (char *)op1->doc_ids_array,
- doc_ids_array_size * op1->number_of_hits + 1);
- }
- end_result->number_of_hits = op1->number_of_hits;
- push(op1->operand_id);
- }
- else if((op1->number_of_hits != 0) &&
- (op2->number_of_hits != 0)) {
- end_result->number_of_hits = 0;
-
- Difference(end_result, op1, op2);
-
- op1->doc_ids_array -= op1_number_of_hits - op1->number_of_hits;
- op2->doc_ids_array -= op2_number_of_hits - op2->number_of_hits;
- end_result->doc_ids_array -= end_result->number_of_hits;
- op1->number_of_hits = end_result->number_of_hits;
- s_free(op2->doc_ids_array);
- if(end_result->number_of_hits > 0) {
- memcpy((char *)op1->doc_ids_array,
- (char *)end_result->doc_ids_array,
- end_result->number_of_hits * doc_ids_array_size);
- }
- else {
- op1->number_of_hits = end_result->number_of_hits;
- s_free(op2->doc_ids_array);
- }
- push(op1->operand_id);
- }
- }
-
- boolean stackinitialized = false;
-
- boolean boolean_operations(operator, result_array)
- char *operator;
- search_result_struct* result_array;
- {
- long operand_id1, operand_id2;
- int i;
-
- operand_id1 = operand_id2 = -1L;
-
- if (!stackinitialized) {
- stackinit();
- stackinitialized = true;
- }
- if (!strcmp(operator, "or")) {
- if(stackempty())
- operand_id1 = -1L;
- else operand_id1 = pop();
- if(stackempty())
- operand_id2 = -1L;
- else operand_id2 = pop();
- if((operand_id1 == -1) || (operand_id2 == -1)) {
- waislog(WLOG_HIGH, WLOG_ERROR,
- "boolean search failed, too few operands.\n");
- return(false);
- }
- else Or_Operator(&result_array[operand_id1], &result_array[operand_id2]);
- }
- else if (!strcmp(operator, "and")) {
- if(stackempty())
- operand_id1 = -1L;
- else operand_id1 = pop();
- if(stackempty())
- operand_id2 = -1L;
- else operand_id2 = pop();
- if((operand_id1 == -1) || (operand_id2 == -1)) {
- waislog(WLOG_HIGH, WLOG_ERROR,
- "boolean search failed, too few operands.\n");
- return(false);
- }
- else And_Operator(&result_array[operand_id1], &result_array[operand_id2]);
- }
- else if (!strcmp(operator, "not")) {
- if(stackempty())
- operand_id1 = -1L;
- else operand_id1 = pop();
- if(stackempty())
- operand_id2 = -1L;
- else operand_id2 = pop();
- if((operand_id1 == -1) || (operand_id2 == -1)) {
- waislog(WLOG_HIGH, WLOG_ERROR,
- "boolean search failed, too few operands.\n");
- return(false);
- }
- else Not_Operator(&result_array[operand_id2], &result_array[operand_id1]);
- }
- }
-
- boolean save_operand_id(operand_id, search_result_array, entries)
- long operand_id;
- search_result_struct* search_result_array;
- long entries;
- {
- if (!stackinitialized) {
- stackinit();
- stackinitialized = true;
- }
- if(end_result == NULL) {
- end_result =
- (search_result_struct *)s_malloc(sizeof(search_result_struct));
- end_result->doc_ids_array =
- (doc_descr_struct *)s_malloc(sizeof(doc_descr_struct) * entries);
- if(end_result->doc_ids_array == NULL) {
- waislog(WLOG_HIGH, WLOG_ERROR, "Out of memory");
- return(false);
- }
- }
- end_result->number_of_hits = search_result_array[operand_id].number_of_hits;
- if(search_result_array[operand_id].doc_ids_array != NULL)
- memcpy((char*)end_result->doc_ids_array,
- (char*)search_result_array[operand_id].doc_ids_array,
- search_result_array[operand_id].number_of_hits *
- sizeof(doc_descr_struct));
- push(operand_id);
- return(true);
- }
-
- long retriev_result(entries, result)
- long entries;
- double* result;
- {
- int doc_id, count;
- long number_of_hits = 0;
-
- if((end_result != NULL) && (result != NULL))
- for(count=0; count < end_result->number_of_hits; count++) {
- doc_id = end_result->doc_ids_array[count].doc_id;
- result[doc_id] = end_result->doc_ids_array[count].score;
- ++number_of_hits;
- }
-
- if(end_result != NULL) {
- if(end_result->doc_ids_array != NULL)
- s_free(end_result->doc_ids_array);
- s_free(end_result);
- }
-
- stackinitialized = false;
-
- return(number_of_hits);
- }
-